import os
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import numpy as np
from datetime import date, datetime
from time import time
dateparse = lambda x: datetime.strptime(x, '%d/%m/%Y')
from IPython.display import display
from matplotlib import pyplot as plt
import plotly.offline as pyo
import plotly.graph_objs as go
pyo.init_notebook_mode()
from sklearn.model_selection import train_test_split
from aeqlib import viz, quantlib, lisa
FOLDER = 'data'
exx = pd.read_csv(
f'{FOLDER}/stoxx50.csv',
header=0, index_col=0, sep=';',
parse_dates=True, date_parser=dateparse)
exx
| OR FP Equity | DG FP Equity | SAN SQ Equity | ASML NA Equity | PHIA NA Equity | FP FP Equity | AI FP Equity | CS FP Equity | BNP FP Equity | BN FP Equity | ... | SAP GY Equity | RI FP Equity | ADS GY Equity | DTE GY Equity | DPW GY Equity | DAI GY Equity | DB1 GY Equity | VNA GY Equity | SX5T Index | SXXP Index | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Dates | |||||||||||||||||||||
| 2005-01-03 | 56.450 | 24.594 | 8.085 | 11.860 | 19.357 | 39.624 | 35.179 | 17.635 | 51.786 | 32.555 | ... | 32.895 | 43.614 | 30.230 | 16.710 | 17.270 | 35.680 | 22.325 | NaN | 4063.340 | 252.550 |
| 2005-01-04 | 56.900 | 24.840 | 8.085 | 11.640 | 19.063 | 39.476 | 35.179 | 17.549 | 52.123 | 32.743 | ... | 32.250 | 43.729 | 30.075 | 16.840 | 17.060 | 36.050 | 22.400 | NaN | 4064.830 | 253.550 |
| 2005-01-05 | 56.950 | 24.741 | 7.971 | 11.310 | 18.681 | 39.106 | 34.792 | 17.319 | 51.834 | 32.507 | ... | 32.050 | 43.999 | 29.700 | 16.700 | 17.000 | 35.920 | 22.300 | NaN | 4032.100 | 251.850 |
| 2005-01-06 | 57.300 | 25.085 | 7.971 | 11.290 | 18.671 | 39.402 | 34.895 | 17.444 | 52.605 | 32.672 | ... | 32.675 | 43.845 | 29.838 | 16.710 | 17.050 | 36.110 | 22.370 | NaN | 4058.170 | 253.090 |
| 2005-01-07 | 57.850 | 25.208 | 7.936 | 11.240 | 18.622 | 39.526 | 35.127 | 17.568 | 53.232 | 32.956 | ... | 33.335 | 43.999 | 29.663 | 16.780 | 17.150 | 35.930 | 22.300 | NaN | 4076.730 | 254.720 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-01-22 | 296.300 | 80.460 | 2.581 | 462.900 | 45.635 | 36.220 | 132.050 | 19.212 | 43.085 | 54.560 | ... | 104.700 | 153.100 | 278.000 | 15.065 | 42.820 | 59.070 | 132.900 | 55.060 | 7812.310 | 408.540 |
| 2021-01-25 | 299.000 | 78.720 | 2.490 | 461.350 | 46.690 | 35.350 | 131.400 | 18.770 | 41.230 | 54.700 | ... | 105.520 | 151.950 | 269.000 | 14.920 | 42.220 | 57.440 | 133.000 | 55.860 | 7705.460 | 405.130 |
| 2021-01-26 | 301.800 | 77.980 | 2.533 | 458.550 | 47.235 | 35.980 | 134.950 | 18.856 | 41.990 | 55.380 | ... | 109.980 | 153.900 | 276.700 | 14.990 | 42.960 | 58.890 | 134.450 | 55.760 | 7791.520 | 407.700 |
| 2021-01-27 | 297.600 | 76.840 | 2.433 | 440.650 | 46.840 | 35.825 | 136.500 | 18.674 | 40.525 | 55.200 | ... | 109.640 | 152.950 | 270.700 | 15.085 | 41.560 | 57.250 | 132.800 | 55.220 | 7669.110 | 402.980 |
| 2021-01-28 | 297.700 | 77.980 | 2.397 | 436.650 | 46.290 | 35.365 | 135.050 | 18.424 | 40.095 | 55.100 | ... | 107.660 | 156.400 | 269.800 | 14.900 | 41.400 | 56.470 | 133.050 | 55.400 | 7625.060 | 399.090 |
4194 rows × 52 columns
def moving_average(s, window=10):
return(s.rolling(window=window).mean())
def crossing_sma_strategy(s, w1, w2, shift=0):
sma1 = moving_average(s, w1)
sma2 = moving_average(s, w2)
signal = np.sign(sma1 - sma2).diff() / 2
signal = signal.shift(shift)
signal = signal.replace(0, np.nan)
signal.iloc[0] = 1
signal = signal.pad()
return(signal)
asset = exx['SX5T Index']
s = asset.copy()
w1 = 200
w2 = 500
sma1 = moving_average(s, w1)
sma2 = moving_average(s, w2)
signal = np.sign(sma1 - sma2).diff() / 2
# example of strategy with 200 and 500
sig = crossing_sma_strategy(asset, 200, 500)
strat = (sig * asset.pct_change().fillna(0) + 1).cumprod() * asset.iloc[0]
# plot
name = asset.name
all_windows = [1, 5, 10, 12, 50, 100, 200, 500]
df = pd.concat([moving_average(asset, w) for w in all_windows], axis=1)
df.columns = [f'{name}_SMA_{w}' for w in all_windows]
fig = viz.easy_plot(pd.concat([df, strat], axis=1), show=False)
fig.show()
def sma_strategy_wrapper(s, w1, w2, fees=0.0005, shift=2):
sig = crossing_sma_strategy(s, w1, w2, shift=shift)
strat = (sig * s.pct_change().fillna(0) - abs(sig.diff().fillna(0)) * fees + 1).cumprod() * s.iloc[0]
return strat.iloc[-1] / strat.iloc[0] - 1
from itertools import product
m = range(1, 501)
for func in [
lambda w: sma_strategy_wrapper(asset, w[0], w[1], fees=0, shift=0),
lambda w: sma_strategy_wrapper(asset, w[0], w[1], fees=0.0005, shift=0),
lambda w: sma_strategy_wrapper(asset, w[0], w[1], fees=0, shift=2),
lambda w: sma_strategy_wrapper(asset, w[0], w[1], fees=0.0005, shift=2),
]:
grid = pd.DataFrame(product(m, m)).sample(frac = 0.01, random_state=3)
grid = grid.sort_index()
t1 = time()
grid['results'] = grid.apply(
func,
axis=1)
t2 = time()
print(t2 - t1)
display(grid.sort_values(by='results'))
14.747560262680054
| 0 | 1 | results | |
|---|---|---|---|
| 5000 | 11 | 1 | -1.000 |
| 32000 | 65 | 1 | -1.000 |
| 92000 | 185 | 1 | -0.992 |
| 124500 | 250 | 1 | -0.992 |
| 137500 | 276 | 1 | -0.985 |
| ... | ... | ... | ... |
| 1516 | 4 | 17 | 27.731 |
| 309 | 1 | 310 | 27.927 |
| 2006 | 5 | 7 | 39.985 |
| 251 | 1 | 252 | 85.900 |
| 7 | 1 | 8 | 5661766894.088 |
2500 rows × 3 columns
11.619581937789917
| 0 | 1 | results | |
|---|---|---|---|
| 5000 | 11 | 1 | -1.000 |
| 32000 | 65 | 1 | -1.000 |
| 124500 | 250 | 1 | -0.993 |
| 92000 | 185 | 1 | -0.993 |
| 137500 | 276 | 1 | -0.987 |
| ... | ... | ... | ... |
| 339 | 1 | 340 | 21.689 |
| 338 | 1 | 339 | 22.711 |
| 309 | 1 | 310 | 24.857 |
| 251 | 1 | 252 | 73.531 |
| 7 | 1 | 8 | 2446296971.245 |
2500 rows × 3 columns
10.17225193977356
| 0 | 1 | results | |
|---|---|---|---|
| 14536 | 30 | 37 | -0.855 |
| 7 | 1 | 8 | -0.850 |
| 5065 | 11 | 66 | -0.839 |
| 236495 | 473 | 496 | -0.838 |
| 214996 | 430 | 497 | -0.834 |
| ... | ... | ... | ... |
| 249953 | 500 | 454 | 2.246 |
| 216814 | 434 | 315 | 2.315 |
| 217314 | 435 | 315 | 2.351 |
| 248441 | 497 | 442 | 2.636 |
| 249439 | 499 | 440 | 3.204 |
2500 rows × 3 columns
10.679455995559692
| 0 | 1 | results | |
|---|---|---|---|
| 7 | 1 | 8 | -0.936 |
| 2006 | 5 | 7 | -0.880 |
| 14536 | 30 | 37 | -0.876 |
| 5065 | 11 | 66 | -0.854 |
| 236495 | 473 | 496 | -0.842 |
| ... | ... | ... | ... |
| 249953 | 500 | 454 | 2.198 |
| 216814 | 434 | 315 | 2.289 |
| 217314 | 435 | 315 | 2.324 |
| 248441 | 497 | 442 | 2.582 |
| 249439 | 499 | 440 | 3.150 |
2500 rows × 3 columns
print(sma_strategy_wrapper(asset, 1, 8, fees=0.0, shift=0))
print(sma_strategy_wrapper(asset, 1, 8, fees=0.0005, shift=2))
5661766894.087999 -0.9358196371136654
grid['results'].describe(percentiles=np.linspace(0, 1, 11))
count 2500.000 mean 0.025 std 0.501 min -0.936 0% -0.936 10% -0.518 20% -0.423 30% -0.326 40% -0.230 50% -0.105 60% 0.078 70% 0.280 80% 0.466 90% 0.713 100% 3.150 max 3.150 Name: results, dtype: float64
grid.sort_values('results')
| 0 | 1 | results | |
|---|---|---|---|
| 7 | 1 | 8 | -0.936 |
| 2006 | 5 | 7 | -0.880 |
| 14536 | 30 | 37 | -0.876 |
| 5065 | 11 | 66 | -0.854 |
| 236495 | 473 | 496 | -0.842 |
| ... | ... | ... | ... |
| 249953 | 500 | 454 | 2.198 |
| 216814 | 434 | 315 | 2.289 |
| 217314 | 435 | 315 | 2.324 |
| 248441 | 497 | 442 | 2.582 |
| 249439 | 499 | 440 | 3.150 |
2500 rows × 3 columns
tol = 20
grid.loc[(abs(grid[0] - 499) < tol) & (abs(grid[1]-440) < tol)]
| 0 | 1 | results | |
|---|---|---|---|
| 240450 | 481 | 451 | 0.821 |
| 242439 | 485 | 440 | 1.710 |
| 242446 | 485 | 447 | 0.447 |
| 243924 | 488 | 425 | 2.121 |
| 244922 | 490 | 423 | 1.984 |
| 244954 | 490 | 455 | 0.742 |
| 246950 | 494 | 451 | 1.433 |
| 246957 | 494 | 458 | 1.553 |
| 247422 | 495 | 423 | 1.649 |
| 248441 | 497 | 442 | 2.582 |
| 249439 | 499 | 440 | 3.150 |
| 249953 | 500 | 454 | 2.198 |
En séparant échantillon de test d'échantillon d'apprentissage...
def optimize(asset, w_min, w_max):
train, test = train_test_split(asset, test_size=0.2, shuffle=False)
m = range(w_min, w_max+1)
grid = pd.DataFrame(product(m, m)).sample(frac = 0.01, random_state=3)
grid = grid.sort_index()
grid['results'] = grid.apply(lambda w: sma_strategy_wrapper(train, w[0], w[1], fees=0.0005, shift=2), axis=1)
best = grid.sort_values('results', ascending=False).iloc[0,:]
w0, w1, IS_results = best
print(w0)
print(w1)
complete_result = sma_strategy_wrapper(asset, int(w0), int(w1))
return grid, IS_results, complete_result
grid, IS, complete = optimize(asset, 5, 500)
52.0 143.0
# example of strategy with 200 and 500
w1 = 52
w2 = 143
shift=2
fees = 0.0005
sig = crossing_sma_strategy(s, w1, w2, shift=shift)
strat = (sig * s.pct_change().fillna(0) - abs(sig.diff().fillna(0)) * fees + 1).cumprod() * s.iloc[0]
train, test = train_test_split(s, test_size=0.2, shuffle=False)
# plot
name = asset.name
all_windows = [1, 52, 143]
df = pd.concat([moving_average(asset, w) for w in all_windows], axis=1)
df.columns = [f'{name}_SMA_{w}' for w in all_windows]
fig = viz.easy_plot(pd.concat([df, strat], axis=1), show=False)
fig.add_vline(x = train.index[-1])
fig.show()
def identity(stock, benchmark='SX5T Index'):
bmk = exx[benchmark]
stock_clean = stock.pct_change().dropna()
bmk_clean = bmk.pct_change().dropna()
stock_1y = stock_clean.tail(252)
stock_3y = stock_clean.tail(3 * 252)
bmk_1y = bmk_clean.reindex(index = stock_1y.index)
bmk_3y = bmk_clean.reindex(index = stock_3y.index)
ids = []
for w, stock_, bmk_ in zip([1, 3], [stock_1y, stock_3y], [bmk_1y, bmk_3y]):
perf = quantlib.annualized_return(stock_)
vol = quantlib.volatility(stock_)
sharpe = quantlib.sharpe_corrected(stock_)
semi_vol = quantlib.semi_volatility(stock_)
sortino = quantlib.sortino_corrected(stock_)
calmar = quantlib.calmar_corrected(stock_)
mdd = quantlib.max_draw_down(stock_)
var = quantlib.value_at_risk(stock_.to_frame()).iloc[0]
cvar = quantlib.expected_shortfall(stock_.to_frame()).iloc[0]
tracking = quantlib.tracking_error(pd.concat([stock_, bmk_], axis=1), stock_.name, bmk_.name)
information = quantlib.information_ratio(pd.concat([stock_, bmk_], axis=1), stock_.name, bmk_.name)
beta = quantlib.beta_ptf(pd.concat([stock_, bmk_], axis=1), stock_.name, bmk_.name)
alpha = quantlib.alpha_ptf(pd.concat([stock_, bmk_], axis=1), stock_.name, bmk_.name)
up_days = quantlib.up_days_ptf(pd.concat([stock_, bmk_], axis=1), stock_.name, bmk_.name)
identity = pd.Series({
"perf": perf,
"vol": vol,
"sharpe": sharpe,
"semi_vol": semi_vol,
"sortino": sortino,
"calmar": calmar,
"mdd": mdd,
"var": var,
"cvar": cvar,
"tracking": tracking,
"information": information,
"beta": beta,
"alpha": alpha,
"up_days": up_days
})
identity.index = [f'{ind}_{w}y' for ind in identity.index]
ids.append(identity)
return pd.concat(ids)
all_ids = exx.iloc[:, :-2].apply(identity, axis=0)
all_ids.T.sort_values('perf_1y')
| perf_1y | vol_1y | sharpe_1y | semi_vol_1y | sortino_1y | calmar_1y | mdd_1y | var_1y | cvar_1y | tracking_1y | ... | sortino_3y | calmar_3y | mdd_3y | var_3y | cvar_3y | tracking_3y | information_3y | beta_3y | alpha_3y | up_days_3y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AIR FP Equity | -0.385 | 0.737 | -0.284 | 0.681 | -0.262 | 0.247 | -0.641 | -0.129 | -0.176 | 0.556 | ... | -0.021 | 0.031 | -0.647 | -0.087 | -0.130 | 0.352 | -0.233 | 1.536 | -0.000 | 0.451 |
| SAN SQ Equity | -0.368 | 0.584 | -0.215 | 0.552 | -0.203 | 0.225 | -0.611 | -0.099 | -0.131 | 0.387 | ... | -0.086 | 0.166 | -0.724 | -0.059 | -0.092 | 0.264 | -1.004 | 1.401 | -0.001 | 0.430 |
| ENI IM Equity | -0.360 | 0.520 | -0.187 | 0.563 | -0.203 | 0.198 | -0.551 | -0.070 | -0.153 | 0.309 | ... | -0.054 | 0.096 | -0.649 | -0.058 | -0.098 | 0.209 | -0.877 | 1.241 | -0.001 | 0.440 |
| BAYN GY Equity | -0.352 | 0.423 | -0.149 | 0.439 | -0.155 | 0.169 | -0.480 | -0.071 | -0.118 | 0.277 | ... | -0.069 | 0.118 | -0.610 | -0.051 | -0.095 | 0.251 | -0.909 | 1.079 | -0.001 | 0.450 |
| INGA NA Equity | -0.332 | 0.628 | -0.209 | 0.625 | -0.208 | 0.200 | -0.603 | -0.094 | -0.144 | 0.411 | ... | -0.084 | 0.144 | -0.703 | -0.081 | -0.109 | 0.277 | -0.864 | 1.511 | -0.001 | 0.451 |
| SAF FP Equity | -0.288 | 0.712 | -0.205 | 0.689 | -0.198 | 0.184 | -0.639 | -0.151 | -0.187 | 0.512 | ... | 0.137 | -0.093 | -0.646 | -0.077 | -0.133 | 0.326 | 0.078 | 1.537 | 0.000 | 0.475 |
| ISP IM Equity | -0.285 | 0.441 | -0.126 | 0.472 | -0.135 | 0.140 | -0.493 | -0.086 | -0.130 | 0.236 | ... | -0.058 | 0.095 | -0.587 | -0.059 | -0.097 | 0.221 | -0.887 | 1.178 | -0.001 | 0.455 |
| AMS SQ Equity | -0.272 | 0.576 | -0.157 | 0.541 | -0.147 | 0.143 | -0.524 | -0.088 | -0.121 | 0.422 | ... | -0.013 | 0.020 | -0.571 | -0.062 | -0.089 | 0.280 | -0.249 | 1.202 | -0.000 | 0.479 |
| CS FP Equity | -0.271 | 0.464 | -0.126 | 0.438 | -0.119 | 0.138 | -0.510 | -0.084 | -0.125 | 0.241 | ... | -0.020 | 0.034 | -0.510 | -0.050 | -0.085 | 0.167 | -0.608 | 1.163 | -0.000 | 0.459 |
| DG FP Equity | -0.262 | 0.548 | -0.143 | 0.544 | -0.142 | 0.122 | -0.466 | -0.111 | -0.135 | 0.323 | ... | -0.005 | 0.006 | -0.466 | -0.063 | -0.102 | 0.211 | -0.227 | 1.324 | -0.000 | 0.460 |
| BNP FP Equity | -0.252 | 0.568 | -0.143 | 0.545 | -0.137 | 0.137 | -0.545 | -0.109 | -0.130 | 0.353 | ... | -0.052 | 0.086 | -0.617 | -0.066 | -0.096 | 0.242 | -0.721 | 1.401 | -0.001 | 0.459 |
| BN FP Equity | -0.250 | 0.299 | -0.075 | 0.315 | -0.079 | 0.090 | -0.362 | -0.049 | -0.063 | 0.281 | ... | -0.015 | 0.026 | -0.429 | -0.044 | -0.058 | 0.203 | -0.473 | 0.560 | -0.000 | 0.479 |
| ABI BB Equity | -0.229 | 0.536 | -0.123 | 0.561 | -0.129 | 0.125 | -0.546 | -0.109 | -0.129 | 0.332 | ... | -0.067 | 0.117 | -0.670 | -0.084 | -0.111 | 0.257 | -0.818 | 1.233 | -0.001 | 0.452 |
| FP FP Equity | -0.222 | 0.520 | -0.116 | 0.503 | -0.112 | 0.116 | -0.521 | -0.104 | -0.149 | 0.315 | ... | -0.030 | 0.053 | -0.613 | -0.055 | -0.098 | 0.212 | -0.572 | 1.238 | -0.000 | 0.444 |
| ITX SQ Equity | -0.217 | 0.415 | -0.090 | 0.376 | -0.082 | 0.081 | -0.371 | -0.058 | -0.077 | 0.263 | ... | 0.034 | -0.026 | -0.373 | -0.050 | -0.062 | 0.216 | -0.116 | 0.959 | -0.000 | 0.495 |
| ENGI FP Equity | -0.205 | 0.407 | -0.084 | 0.447 | -0.092 | 0.098 | -0.477 | -0.066 | -0.109 | 0.250 | ... | -0.004 | 0.006 | -0.477 | -0.050 | -0.076 | 0.196 | -0.238 | 0.944 | -0.000 | 0.481 |
| MUV2 GY Equity | -0.201 | 0.481 | -0.097 | 0.443 | -0.089 | 0.098 | -0.484 | -0.074 | -0.115 | 0.288 | ... | 0.208 | -0.134 | -0.484 | -0.047 | -0.082 | 0.192 | 0.156 | 1.115 | 0.000 | 0.508 |
| ALV GY Equity | -0.187 | 0.437 | -0.082 | 0.414 | -0.078 | 0.091 | -0.487 | -0.087 | -0.114 | 0.213 | ... | -0.001 | 0.002 | -0.487 | -0.046 | -0.078 | 0.146 | -0.269 | 1.161 | -0.000 | 0.479 |
| EL FP Equity | -0.155 | 0.366 | -0.057 | 0.386 | -0.060 | 0.053 | -0.342 | -0.062 | -0.080 | 0.243 | ... | 0.056 | -0.044 | -0.352 | -0.051 | -0.067 | 0.194 | -0.100 | 0.830 | 0.000 | 0.509 |
| SAP GY Equity | -0.151 | 0.397 | -0.060 | 0.472 | -0.071 | 0.055 | -0.366 | -0.073 | -0.132 | 0.281 | ... | 0.209 | -0.191 | -0.366 | -0.049 | -0.085 | 0.219 | 0.161 | 0.960 | 0.000 | 0.517 |
| SAN FP Equity | -0.151 | 0.272 | -0.041 | 0.270 | -0.041 | 0.035 | -0.234 | -0.050 | -0.063 | 0.274 | ... | 0.310 | -0.288 | -0.234 | -0.033 | -0.048 | 0.205 | 0.158 | 0.550 | 0.000 | 0.487 |
| DB1 GY Equity | -0.132 | 0.355 | -0.047 | 0.360 | -0.047 | 0.049 | -0.370 | -0.056 | -0.091 | 0.255 | ... | 0.247 | -0.176 | -0.370 | -0.042 | -0.063 | 0.204 | 0.149 | 0.747 | 0.000 | 0.503 |
| VOW3 GY Equity | -0.093 | 0.535 | -0.050 | 0.540 | -0.050 | 0.046 | -0.496 | -0.112 | -0.131 | 0.310 | ... | 0.002 | -0.001 | -0.530 | -0.063 | -0.098 | 0.236 | -0.145 | 1.393 | -0.000 | 0.468 |
| KER FP Equity | -0.082 | 0.408 | -0.034 | 0.396 | -0.033 | 0.033 | -0.402 | -0.067 | -0.092 | 0.241 | ... | 0.457 | -0.376 | -0.414 | -0.055 | -0.080 | 0.242 | 0.499 | 1.101 | 0.001 | 0.512 |
| ADS GY Equity | -0.072 | 0.420 | -0.030 | 0.428 | -0.031 | 0.031 | -0.430 | -0.067 | -0.100 | 0.271 | ... | 0.474 | -0.328 | -0.472 | -0.057 | -0.075 | 0.241 | 0.497 | 1.002 | 0.001 | 0.520 |
| DTE GY Equity | -0.046 | 0.299 | -0.014 | 0.315 | -0.014 | 0.016 | -0.348 | -0.052 | -0.075 | 0.181 | ... | 0.179 | -0.114 | -0.348 | -0.043 | -0.058 | 0.153 | 0.032 | 0.718 | 0.000 | 0.470 |
| CRH ID Equity | -0.043 | 0.488 | -0.021 | 0.486 | -0.021 | 0.022 | -0.517 | -0.090 | -0.122 | 0.315 | ... | 0.233 | -0.152 | -0.523 | -0.052 | -0.088 | 0.228 | 0.195 | 1.175 | 0.000 | 0.487 |
| AI FP Equity | -0.027 | 0.304 | -0.008 | 0.324 | -0.009 | 0.008 | -0.293 | -0.057 | -0.082 | 0.192 | ... | 0.546 | -0.438 | -0.293 | -0.039 | -0.059 | 0.142 | 0.659 | 0.808 | 0.000 | 0.512 |
| RI FP Equity | -0.026 | 0.280 | -0.007 | 0.280 | -0.007 | 0.008 | -0.286 | -0.038 | -0.059 | 0.237 | ... | 0.253 | -0.170 | -0.319 | -0.037 | -0.047 | 0.192 | 0.101 | 0.596 | 0.000 | 0.481 |
| BAS GY Equity | -0.005 | 0.431 | -0.002 | 0.441 | -0.002 | 0.002 | -0.389 | -0.067 | -0.097 | 0.221 | ... | -0.029 | 0.053 | -0.565 | -0.054 | -0.074 | 0.171 | -0.752 | 1.188 | -0.000 | 0.444 |
| ENEL IM Equity | 0.006 | 0.379 | 0.016 | 0.424 | 0.014 | -0.015 | -0.390 | -0.058 | -0.119 | 0.250 | ... | 0.661 | -0.490 | -0.390 | -0.040 | -0.074 | 0.198 | 0.791 | 0.835 | 0.001 | 0.511 |
| LIN GY Equity | 0.016 | 0.363 | 0.045 | 0.345 | 0.047 | -0.049 | -0.333 | -0.060 | -0.068 | 0.183 | ... | 0.509 | -0.453 | -0.333 | -0.050 | -0.061 | 0.173 | 0.457 | 0.990 | 0.000 | 0.491 |
| VIV FP Equity | 0.026 | 0.325 | 0.079 | 0.355 | 0.072 | -0.083 | -0.308 | -0.067 | -0.090 | 0.251 | ... | 0.305 | -0.223 | -0.335 | -0.046 | -0.067 | 0.196 | 0.203 | 0.709 | 0.000 | 0.492 |
| BMW GY Equity | 0.037 | 0.454 | 0.082 | 0.465 | 0.080 | -0.085 | -0.438 | -0.083 | -0.118 | 0.251 | ... | -0.020 | 0.038 | -0.596 | -0.054 | -0.079 | 0.192 | -0.517 | 1.152 | -0.000 | 0.476 |
| PHIA NA Equity | 0.053 | 0.324 | 0.163 | 0.318 | 0.166 | -0.153 | -0.344 | -0.049 | -0.068 | 0.258 | ... | 0.509 | -0.399 | -0.352 | -0.046 | -0.066 | 0.208 | 0.509 | 0.772 | 0.000 | 0.508 |
| VNA GY Equity | 0.059 | 0.319 | 0.186 | 0.315 | 0.188 | -0.207 | -0.286 | -0.056 | -0.076 | 0.300 | ... | 0.500 | -0.425 | -0.286 | -0.043 | -0.057 | 0.241 | 0.359 | 0.475 | 0.000 | 0.487 |
| AD NA Equity | 0.068 | 0.260 | 0.261 | 0.261 | 0.260 | -0.321 | -0.211 | -0.054 | -0.064 | 0.336 | ... | 0.458 | -0.424 | -0.233 | -0.036 | -0.052 | 0.247 | 0.259 | 0.323 | 0.000 | 0.504 |
| IBE SQ Equity | 0.081 | 0.322 | 0.252 | 0.332 | 0.244 | -0.296 | -0.273 | -0.049 | -0.088 | 0.230 | ... | 0.984 | -0.840 | -0.273 | -0.038 | -0.058 | 0.188 | 1.038 | 0.669 | 0.001 | 0.519 |
| NOKIA FH Equity | 0.083 | 0.500 | 0.165 | 0.512 | 0.161 | -0.180 | -0.459 | -0.076 | -0.138 | 0.407 | ... | -0.012 | 0.017 | -0.616 | -0.054 | -0.117 | 0.328 | -0.190 | 0.921 | -0.000 | 0.474 |
| OR FP Equity | 0.098 | 0.298 | 0.329 | 0.292 | 0.337 | -0.413 | -0.238 | -0.057 | -0.066 | 0.227 | ... | 0.770 | -0.739 | -0.246 | -0.038 | -0.052 | 0.180 | 0.818 | 0.724 | 0.001 | 0.529 |
| MC FP Equity | 0.195 | 0.362 | 0.540 | 0.364 | 0.537 | -0.607 | -0.322 | -0.061 | -0.075 | 0.193 | ... | 0.918 | -0.796 | -0.344 | -0.051 | -0.065 | 0.182 | 1.316 | 1.057 | 0.001 | 0.532 |
| KNEBV FH Equity | 0.212 | 0.253 | 0.839 | 0.265 | 0.799 | -0.960 | -0.221 | -0.041 | -0.043 | 0.299 | ... | 0.871 | -0.707 | -0.261 | -0.036 | -0.040 | 0.226 | 0.661 | 0.450 | 0.001 | 0.495 |
| DPW GY Equity | 0.246 | 0.389 | 0.632 | 0.420 | 0.585 | -0.574 | -0.428 | -0.077 | -0.099 | 0.246 | ... | 0.108 | -0.065 | -0.497 | -0.049 | -0.073 | 0.190 | -0.014 | 0.962 | 0.000 | 0.515 |
| SU FP Equity | 0.264 | 0.384 | 0.686 | 0.420 | 0.628 | -0.762 | -0.346 | -0.068 | -0.101 | 0.195 | ... | 0.663 | -0.578 | -0.346 | -0.046 | -0.073 | 0.163 | 1.016 | 1.095 | 0.001 | 0.525 |
| DAI GY Equity | 0.284 | 0.596 | 0.475 | 0.567 | 0.500 | -0.563 | -0.504 | -0.100 | -0.143 | 0.354 | ... | -0.023 | 0.041 | -0.686 | -0.068 | -0.104 | 0.253 | -0.372 | 1.555 | -0.000 | 0.458 |
| SIE GY Equity | 0.294 | 0.412 | 0.713 | 0.409 | 0.719 | -0.662 | -0.444 | -0.068 | -0.101 | 0.203 | ... | 0.345 | -0.209 | -0.496 | -0.048 | -0.072 | 0.159 | 0.432 | 1.124 | 0.000 | 0.492 |
| PRX NA Equity | 0.332 | 0.403 | 0.824 | 0.407 | 0.816 | -1.041 | -0.319 | -0.060 | -0.075 | 0.378 | ... | 0.571 | -0.652 | -0.333 | -0.059 | -0.071 | 0.340 | 0.600 | 0.636 | 0.001 | 0.507 |
| ASML NA Equity | 0.502 | 0.405 | 1.240 | 0.427 | 1.175 | -1.373 | -0.365 | -0.069 | -0.095 | 0.281 | ... | 1.116 | -1.033 | -0.365 | -0.050 | -0.071 | 0.234 | 1.467 | 1.044 | 0.001 | 0.552 |
| FLTR ID Equity | 0.510 | 0.501 | 1.018 | 0.490 | 1.040 | -1.147 | -0.445 | -0.095 | -0.129 | 0.422 | ... | 0.568 | -0.448 | -0.461 | -0.062 | -0.094 | 0.340 | 0.505 | 0.825 | 0.001 | 0.484 |
| ADYEN NA Equity | 1.021 | 0.391 | 2.612 | 0.394 | 2.594 | -4.322 | -0.236 | -0.055 | -0.062 | 0.411 | ... | 1.567 | -1.471 | -0.443 | -0.063 | -0.084 | 0.665 | 0.945 | 0.625 | 0.003 | 0.541 |
50 rows × 28 columns
# normalize data
all_ids = ((all_ids.T - all_ids.T.mean()) / all_ids.T.std()).T
infos = lisa.get_tickers(all_ids.columns)[['gics_sector_name', 'country', 'name']]
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=0)
pred = kmeans.fit_predict(all_ids.T)
abcd = pd.concat([infos, pd.Series(pred, index=all_ids.columns, name='cluster')], axis=1)
# abcd.sort_values('cluster').to_csv(f'{FOLDER}/cluster.csv', sep=';')
abcd = abcd.sort_values(['cluster', 'gics_sector_name'])
abcd.set_index('cluster', append=True)
# 0 est + proche de 2 Quali / croissance avec LVMH, ASML, OR, mais tempéré par SU et IBE…
# 1 est très financière et cyclique (pas très loin de 3)
# 2 a l'air d'être assez Quali & défensif
# 3 a l'air assez Value, avec des dépendances au secteur aérien, automobile et bancaire
# 4 outlier
| gics_sector_name | country | name | ||
|---|---|---|---|---|
| cluster | ||||
| FLTR ID Equity | 0 | Consumer Discretionary | IRELAND | FLUTTER ENTERTAINMENT PLC |
| MC FP Equity | 0 | Consumer Discretionary | FRANCE | LVMH MOET HENNESSY LOUIS VUI |
| PRX NA Equity | 0 | Consumer Discretionary | NETHERLANDS | PROSUS NV |
| OR FP Equity | 0 | Consumer Staples | FRANCE | L'OREAL |
| KNEBV FH Equity | 0 | Industrials | FINLAND | KONE OYJ-B |
| SIE GY Equity | 0 | Industrials | GERMANY | SIEMENS AG-REG |
| SU FP Equity | 0 | Industrials | FRANCE | SCHNEIDER ELECTRIC SE |
| ASML NA Equity | 0 | Information Technology | NETHERLANDS | ASML HOLDING NV |
| IBE SQ Equity | 0 | Utilities | SPAIN | IBERDROLA SA |
| BMW GY Equity | 1 | Consumer Discretionary | GERMANY | BAYERISCHE MOTOREN WERKE AG |
| ENI IM Equity | 1 | Energy | ITALY | ENI SPA |
| FP FP Equity | 1 | Energy | FRANCE | TOTAL SE |
| ALV GY Equity | 1 | Financials | GERMANY | ALLIANZ SE-REG |
| CS FP Equity | 1 | Financials | FRANCE | AXA SA |
| ISP IM Equity | 1 | Financials | ITALY | INTESA SANPAOLO |
| MUV2 GY Equity | 1 | Financials | GERMANY | MUENCHENER RUECKVER AG-REG |
| BAYN GY Equity | 1 | Health Care | GERMANY | BAYER AG-REG |
| BAS GY Equity | 1 | Materials | GERMANY | BASF SE |
| CRH ID Equity | 1 | Materials | IRELAND | CRH PLC |
| ENGI FP Equity | 1 | Utilities | FRANCE | ENGIE |
| DTE GY Equity | 2 | Communication Services | GERMANY | DEUTSCHE TELEKOM AG-REG |
| VIV FP Equity | 2 | Communication Services | FRANCE | VIVENDI |
| ADS GY Equity | 2 | Consumer Discretionary | GERMANY | ADIDAS AG |
| EL FP Equity | 2 | Consumer Discretionary | FRANCE | ESSILORLUXOTTICA |
| ITX SQ Equity | 2 | Consumer Discretionary | SPAIN | INDUSTRIA DE DISENO TEXTIL |
| KER FP Equity | 2 | Consumer Discretionary | FRANCE | KERING |
| AD NA Equity | 2 | Consumer Staples | NETHERLANDS | KONINKLIJKE AHOLD DELHAIZE N |
| BN FP Equity | 2 | Consumer Staples | FRANCE | DANONE |
| RI FP Equity | 2 | Consumer Staples | FRANCE | PERNOD RICARD SA |
| DB1 GY Equity | 2 | Financials | GERMANY | DEUTSCHE BOERSE AG |
| PHIA NA Equity | 2 | Health Care | NETHERLANDS | KONINKLIJKE PHILIPS NV |
| SAN FP Equity | 2 | Health Care | FRANCE | SANOFI |
| DPW GY Equity | 2 | Industrials | GERMANY | DEUTSCHE POST AG-REG |
| SAP GY Equity | 2 | Information Technology | GERMANY | SAP SE |
| AI FP Equity | 2 | Materials | FRANCE | AIR LIQUIDE SA |
| LIN GY Equity | 2 | Materials | BRITAIN | LINDE PLC |
| VNA GY Equity | 2 | Real Estate | GERMANY | VONOVIA SE |
| ENEL IM Equity | 2 | Utilities | ITALY | ENEL SPA |
| DAI GY Equity | 3 | Consumer Discretionary | GERMANY | DAIMLER AG-REGISTERED SHARES |
| VOW3 GY Equity | 3 | Consumer Discretionary | GERMANY | VOLKSWAGEN AG-PREF |
| ABI BB Equity | 3 | Consumer Staples | BELGIUM | ANHEUSER-BUSCH INBEV SA/NV |
| BNP FP Equity | 3 | Financials | FRANCE | BNP PARIBAS |
| INGA NA Equity | 3 | Financials | NETHERLANDS | ING GROEP NV |
| SAN SQ Equity | 3 | Financials | SPAIN | BANCO SANTANDER SA |
| AIR FP Equity | 3 | Industrials | FRANCE | AIRBUS SE |
| DG FP Equity | 3 | Industrials | FRANCE | VINCI SA |
| SAF FP Equity | 3 | Industrials | FRANCE | SAFRAN SA |
| AMS SQ Equity | 3 | Information Technology | SPAIN | AMADEUS IT GROUP SA |
| NOKIA FH Equity | 3 | Information Technology | FINLAND | NOKIA OYJ |
| ADYEN NA Equity | 4 | Information Technology | NETHERLANDS | ADYEN NV |
viz.easy_plot([exx['ADYEN NA Equity']])
new_problem = pd.concat([all_ids.T, abcd['cluster']], axis=1)
X = new_problem.iloc[:,:-1]
y = new_problem.iloc[:, -1]
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
clf = DecisionTreeClassifier(max_leaf_nodes=8, random_state=0)
clf.fit(X, y)
tree.plot_tree(clf, feature_names=X.columns)
plt.rcParams["figure.figsize"] = (1,1)
plt.show()
Gini mesure l'impureté de la classification : $ G = 1 - \sum_1^n p_i^{2}$
N = 50
1 - (9/N)**2 - (11/N)**2 -(18/N)**2 -(11/N)**2 -(1/N)**2
0.7408000000000001
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
# Create decision tree classifer object
clf = RandomForestClassifier(random_state=0, n_jobs=-1)
# Train model
model = clf.fit(X, y)
# Calculate feature importances
importances = model.feature_importances_
# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]
# Rearrange feature names so they match the sorted feature importances
names = X.columns
# Barplot: Add bars
plt.bar(range(X.shape[1]), importances[indices])
# Add feature names as x-axis labels
plt.xticks(range(X.shape[1]), names, rotation=75, fontsize = 8)
# Create plot title
plt.title("Feature Importance")
plt.rcParams["figure.figsize"] = (15,10)
# Show plot
plt.show()
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Generate a large random dataset
d = pd.DataFrame(exx.pct_change())
# Compute the correlation matrix
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
<matplotlib.axes._subplots.AxesSubplot at 0x7f824e67a2e0>
viz.easy_plot(exx.pct_change())